# -*- coding: utf-8 -*-
"""
@Time    : 2024/8/28 13:52 
@Author  : ZhangShenao 
@File    : embedding.py 
@Desc    : 使用OpenAI的 `text-embedding-3-small` Embedding模型,实现文本嵌入
"""
import os
from typing import List

import dotenv
from openai import OpenAI

from load_review import load_review_data


def text_embedding(text: str, client: OpenAI, model="text-embedding-3-small") -> List[float]:
    """文本Embedding"""

    resp = client.embeddings.create(
        model=model,
        input=text,
        encoding_format="float"
    )

    return resp.data[0].embedding


if __name__ == '__main__':
    # 加载环境变量
    dotenv.load_dotenv()

    # 创建OpenAI客户端
    client = OpenAI(base_url=os.getenv("OPENAI_API_BASE"))

    # 加载原始数据
    df = load_review_data()

    # 对评论内容进行Embedding
    df["embedding"] = df.combined.apply(lambda x: text_embedding(x, client))

    # 将Embedding结果保存到文件中
    df.to_csv(path_or_buf="./data/fine_food_reviews_with_embeddings_1k_1126.csv")

    print("评论内容Embedding完成")
